// Title      : Structure.java
// Author     : James Baird
// Created    : Monday, 8th October 2001
// Description: Structure Class

import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.HashMap;
import java.util.Vector;
import org.apache.xerces.parsers.DOMParser;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
import org.w3c.dom.Text;
import org.xml.sax.SAXException;

public class Structure
{
  public static void main(String[] args)
  {
    if (args.length < 2)
      System.out.println("Usage: Structure <day> <logs path>");
    else
      sample(Integer.parseInt(args[0]), args[1]);
  }
  
  protected static void sample(int day, String path)
  {
    try
    {
      String[] index = getGroupIndex(path);
    
      System.out.println(index.length + " group(s) read");
    
      for (int i = 0; i < index.length; i++)
      {
        DOMParser parser = new DOMParser();
      
        parser.parse(path + "/" + index[i]);

        Document document = parser.getDocument();
            
        if (getDay(0, document) == day)
        {
          String groupName = getGroup(document);

          FileWriter outfile = new FileWriter(path + "/" + groupName + ".txt");

          for (int j = 0; j < getCount(document); j++)
          {
            String structure = identifyStructure(j, document);

            outfile.write("*** POST " + groupName + " " + j + " " + structure + System.getProperty("line.separator"));

            if (!structure.equals("T"))
              exportPost(outfile, j, document);

            outfile.write(System.getProperty("line.separator"));
          }

          outfile.flush();
          outfile.close();
        }
      }

    }
    catch (IOException e)
    {
      e.printStackTrace();
    }
    catch (SAXException e)
    {
      e.printStackTrace();
    }
  }
  
  protected static String[] getGroupIndex(String path)
  {
    String[] result = null;
    
    File directory = new File(path);
    
    result = directory.list();
    
    return result;
  }
  
  protected static int getDay(int index, Document document)
  {
    int result = -1;
    
    NodeList posts = document.getDocumentElement().getChildNodes();
    
    if (index < posts.getLength())
    {
      Element post = (Element)posts.item(index);

      result = Integer.parseInt(post.getAttribute("day"));
    }
    
    return result;
  }
  
  protected static String getGroup(Document document)
  {
    return document.getDocumentElement().getAttribute("name");
  }
  
  protected static int getDayCount(int day, Document document)
  {
    int count = 0;
    
    NodeList posts = document.getDocumentElement().getChildNodes();
    
    for (int i = 0; i < posts.getLength(); i++)
    {
      Element post = (Element)posts.item(i);

      int postDay = Integer.parseInt(post.getAttribute("day"));

      if (postDay > day)
        break;
        
      if (postDay == day)
        count++;      
    }
    
    return count;
  }
  
  protected static int getCount(Document document)
  {
    NodeList posts = document.getDocumentElement().getChildNodes();
    
    return posts.getLength();
  }
  
  protected static String getSubject(int index, Document document)
  {
    String result = null;
    
    NodeList posts = document.getDocumentElement().getChildNodes();
    
    if (index < posts.getLength())
    {
      Element post = (Element)posts.item(index);
      
      result = post.getAttribute("subject");
    }
    
    return result;
  }
  
  protected static int getVolume(Document document)
  {
    return Integer.parseInt(document.getDocumentElement().getAttribute("volume"));
  }
  
  protected static void exportPost(FileWriter outfile, int index, Document document) throws IOException
  {
    outfile.write(System.getProperty("line.separator"));

    NodeList posts = document.getDocumentElement().getChildNodes();
    
    Element post = (Element)posts.item(index);
    
    NodeList components = post.getChildNodes();
    
    Element header = (Element)components.item(0);

    NodeList hparagraphs = header.getChildNodes();

    for (int i = 0; i < hparagraphs.getLength(); i++)
    {
      Text text = (Text)hparagraphs.item(i).getFirstChild();

      if (text == null)
        outfile.write(System.getProperty("line.separator"));
      else
      {
        String line = text.getData();

        if (line.startsWith("Date:") || line.startsWith("Subject:") || line.startsWith("References:") || line.startsWith("From:"))
          outfile.write(line + System.getProperty("line.separator"));
      }
    }

    outfile.write("---" + System.getProperty("line.separator"));

    Element body = (Element)components.item(1);
    
    NodeList paragraphs = body.getChildNodes();

    for (int i = 0; i < paragraphs.getLength(); i++)
    {
      Text text = (Text)paragraphs.item(i).getFirstChild();

      if (text == null)
        outfile.write(System.getProperty("line.separator"));
      else
        outfile.write(text.getData()+ System.getProperty("line.separator"));
    }
  }
  
  protected static int getArticle(int index, Document document)
  {
    int result = -1;
    
    NodeList posts = document.getDocumentElement().getChildNodes();
    
    if (index < posts.getLength())
    {
      Element post = (Element)posts.item(index);

      result = Integer.parseInt(post.getAttribute("article"));
    }
    
    return result;
  }

  protected static String identifyStructure(int index, Document document)
  {
    NodeList posts = document.getDocumentElement().getChildNodes();
    
    Element post = (Element)posts.item(index);
    
    NodeList components = post.getChildNodes();
    
    Element body = (Element)components.item(1);

    String result = "T";

    NodeList nodeList = body.getChildNodes();

    boolean mark = false;

    for (int i = 0; i < nodeList.getLength(); i++)
    {
      Text text = (Text)nodeList.item(i).getFirstChild();

      if (text == null)
        continue;

      String line = text.getData();

      if (line.startsWith(">") || line.endsWith("wrote:") || line.indexOf("wrote in message") != -1)
      {
        result = ":UK:";

        break;
      }
    }

    return result;
  }
}
